Load helper functions

knitr::opts_chunk$set(fig.path = "figs/", message = FALSE, warnings = FALSE)
# load some helper functions
source('explore-JULES-ES-1p0_PPE_functions.R')

Load packages

  library(RColorBrewer)
  library(fields)
  library(MASS)
  library(DiceKriging)
  library(ncdf4)
  library(ncdf4.helpers)
  source("https://raw.githubusercontent.com/dougmcneall/packages-git/master/emtools.R")
  source("https://raw.githubusercontent.com/dougmcneall/packages-git/master/imptools.R")
  source("https://raw.githubusercontent.com/dougmcneall/packages-git/master/vistools.R")

Preliminaries

# Some pallete options
yg = brewer.pal(9, "YlGn")
ryb = brewer.pal(11, "RdYlBu")
byr = rev(ryb)
rb = brewer.pal(11, "RdBu")
br = rev(rb)
blues = brewer.pal(9, 'Blues')
cbPal <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
# data location

ensloc <- '/project/carbon_ppe/JULES-ES-1p0_PPE/'
ensmember <- 'P0000/'
subdir <- 'stats/'

floc <- paste0(ensloc,ensmember,subdir)
fn <- 'JULES-ES-1p0_P0000_Annual_global.nc'
# test file
nc <- nc_open(paste0(floc,fn))

# What variables are in the file? 
varlist <- nc.get.variable.list(nc)
# for each of the variables in the file, average the last 20 years as the "modern" value,
# and then place in a matrix

modern_value <- function(nc, variable, ix){
  # A basic function to read a variable and 
  # take the mean of the timeseries at locations ix
  dat <- ncvar_get(nc, variable)
  out <- mean(dat[ix])
  out
}
#144:164 is the 1993:2013
modern_value(nc = nc, variable = "npp_nlim_lnd_mean", ix = 144:164)
## [1] 1.857912e-08
vec <- sapply(varlist, FUN = modern_value, nc = nc, ix = 144:164)
# Generate ensemble numbers
nens = 499

datmat <- matrix(nrow = nens, ncol = length(varlist))
colnames(datmat) <- varlist

enslist <- paste("P", formatC(0:(nens-1), width=4, flag="0"), sep="")

floc <- paste0(ensloc,ensmember,subdir)

for(i in 1:nens){
  
  vec <- rep(NA, length(varlist))
  
  ensmember <- enslist[i] 
  
  fn <- paste0(ensloc,ensmember,'/stats/','JULES-ES-1p0_',ensmember,'_Annual_global.nc')
  #print(fn)
  
  try(nc <- nc_open(paste0(fn)))
  try(vec <- sapply(varlist, FUN = modern_value, nc = nc, ix = 144:164))
  datmat[i, ] <- vec
  nc_close(nc)
}
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0069/stats/JULES-ES-1p0_P0069_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0086/stats/JULES-ES-1p0_P0086_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0144/stats/JULES-ES-1p0_P0144_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0165/stats/JULES-ES-1p0_P0165_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0168/stats/JULES-ES-1p0_P0168_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0183/stats/JULES-ES-1p0_P0183_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0185/stats/JULES-ES-1p0_P0185_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0214/stats/JULES-ES-1p0_P0214_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0216/stats/JULES-ES-1p0_P0216_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0235/stats/JULES-ES-1p0_P0235_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0238/stats/JULES-ES-1p0_P0238_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
## [1] "vobjtovarid4: error #F: I could not find the requsted var (or dimvar) in the file!"
## [1] "var (or dimvar) name: residualFrac_lnd_sum"
## [1] "file name: /project/carbon_ppe/JULES-ES-1p0_PPE/P0242/stats/JULES-ES-1p0_P0242_Annual_global.nc"
## Error in vobjtovarid4(nc, varid, verbose = verbose, allowdimvar = TRUE) : 
##   Variable not found
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0251/stats/JULES-ES-1p0_P0251_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
## [1] "vobjtovarid4: error #F: I could not find the requsted var (or dimvar) in the file!"
## [1] "var (or dimvar) name: residualFrac_lnd_sum"
## [1] "file name: /project/carbon_ppe/JULES-ES-1p0_PPE/P0253/stats/JULES-ES-1p0_P0253_Annual_global.nc"
## Error in vobjtovarid4(nc, varid, verbose = verbose, allowdimvar = TRUE) : 
##   Variable not found
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0259/stats/JULES-ES-1p0_P0259_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0285/stats/JULES-ES-1p0_P0285_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0357/stats/JULES-ES-1p0_P0357_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0376/stats/JULES-ES-1p0_P0376_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0387/stats/JULES-ES-1p0_P0387_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0394/stats/JULES-ES-1p0_P0394_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0405/stats/JULES-ES-1p0_P0405_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0424/stats/JULES-ES-1p0_P0424_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0455/stats/JULES-ES-1p0_P0455_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
## Error in R_nc4_open: No such file or directory
## Error in nc_open(paste0(fn)) : 
##   Error in nc_open trying to open file /project/carbon_ppe/JULES-ES-1p0_PPE/P0457/stats/JULES-ES-1p0_P0457_Annual_global.nc
## Error in R_nc4_inq_varndims: NetCDF: Not a valid ID
## Error in ncvar_ndims(ncid, varid) : error returned from C call
## Error in R_nc4_close: NetCDF: Not a valid ID
nlevel0.ix <- which(is.na(datmat[,3]))

# Y is the whole data set
Y <- datmat
# Y0 is the 'cleaned' data set, truncated to variables that change, and removing NAs
Y.level0 <- datmat[-nlevel0.ix, -c(2,30,31)]
Y.nlevel0 <- datmat[nlevel0.ix, -c(2,30,31)]
years = 1864:2013
ysec = 60*60*24*365
#norm.vec = c(1e12, 1e12, 1e12/ysec , 1e12, 1e12, 1e9)

# Load up the data
lhs_i = read.table('~/brazilCSSP/code/brazil_cssp/analyze_u-ao732/data/ES_PPE_i/lhs_u-ao732.txt', header = TRUE)
lhs_ii = read.table('~/brazilCSSP/code/brazil_cssp/analyze_u-ao732/data/ES_PPE_ii/lhs_u-ao732a.txt', header = TRUE)

toplevel.ix = 1:499

# The raw input data is a latin hypercube
lhs = rbind(lhs_i, lhs_ii)[toplevel.ix, ]
lhs.level0 <- lhs[-nlevel0.ix,]


X = normalize(lhs)
colnames(X) = colnames(lhs)

X.level0 <- X[-nlevel0.ix,]
X.nlevel0 <- X[nlevel0.ix,]

d = ncol(X)
# lower and higher bound on the normalised matrix for visualisation
rx = rbind(rep(0,32), rep(1,32))

Where did the model fail to run?

#simple way
pairs(X.nlevel0, xlim = c(0,1), ylim = c(0,1),col = 'red', gap = 0, lower.panel = NULL, pch = 20)

# plot failures over everything else
X.all <- rbind(X.level0, X.nlevel0)
colvec <- rep('grey', nrow(X))
colvec[(nrow(X.level0)+1):nrow(X.all)] <- 'red'

pairs(X.all, xlim = c(0,1), ylim = c(0,1),col = colvec, gap = 0, lower.panel = NULL, pch = 20, cex = 0.5)

par(mfrow = c(6,6), mar = c(1,1,1,1))
for(i in 1:d){
  
  plot(X[,i], datmat[,6])
  
}

Basic histograms of the output at Level 0 (places the model ran)

d = ncol(Y.level0)

par(mfrow = c(5,6))

for(i in 1:d){
  hist(Y.level0[,i], main = colnames(Y.level0)[i], xlab = '', ylab = '', col = 'lightgrey')
}

Test out some emulators

At the moment, we’ll just keep to the things that we know should work. We’ll use mean NPP as an example

p <- ncol(X.level0)

y <- Y.level0[,'npp_nlim_lnd_sum']

par(mfrow = c(5,7), mar = c(3,1,3,1))
for(i in 1:p){
  
  plot(X.level0[,i], y, xlab = '', ylab = '', main = colnames(X.level0)[i])
  
}

A DiceKriging emulator

em <- km(~., design = X.level0, response = y)
## 
## optimisation start
## ------------------
## * estimation method   : MLE 
## * optimisation method : BFGS 
## * analytical gradient : used
## * trend model : ~alpha_io + a_wl_io + bio_hum_cn + b_wl_io + dcatch_dlai_io + 
##     dqcrit_io + dz0v_dh_io + f0_io + fd_io + g_area_io + g_root_io + 
##     g_wood_io + gs_nvg_io + hw_sw_io + kaps_roth + knl_io + lai_max_io + 
##     lai_min_io + lma_io + n_inorg_turnover + nmass_io + nr_io + 
##     retran_l_io + retran_r_io + r_grow_io + rootd_ft_io + sigl_io + 
##     sorp + tleaf_of_io + tlow_io + tupp_io + l_vg_soil
## * covariance model : 
##   - type :  matern5_2 
##   - nugget : NO
##   - parameters lower bounds :  1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 
##   - parameters upper bounds :  2 2 2 2 2 2 1.998154 1.997409 2 2 2 2 2 2 2 2 2 1.999201 2 1.996693 2 2 2 2 2 2 1.995643 1.998832 2 2 2 2 
##   - best initial criterion value(s) :  -7024.504 
## 
## N = 32, M = 5 machine precision = 2.22045e-16
## At X0, 0 variables are exactly at the bounds
## At iterate     0  f=       7024.5  |proj g|=       1.8026
## At iterate     1  f =       6933.9  |proj g|=        1.9231
## At iterate     2  f =       6903.2  |proj g|=        1.8044
## At iterate     3  f =       6902.8  |proj g|=        1.8021
## At iterate     4  f =       6902.5  |proj g|=        1.8442
## At iterate     5  f =       6902.3  |proj g|=        1.8456
## At iterate     6  f =       6901.5  |proj g|=        1.8474
## At iterate     7  f =         6900  |proj g|=        1.8423
## At iterate     8  f =       6899.3  |proj g|=        1.7876
## At iterate     9  f =       6897.6  |proj g|=        1.6473
## At iterate    10  f =       6896.4  |proj g|=        1.5986
## At iterate    11  f =         6895  |proj g|=        1.4896
## At iterate    12  f =       6894.9  |proj g|=        1.8562
## At iterate    13  f =       6894.5  |proj g|=        1.8402
## At iterate    14  f =       6894.3  |proj g|=        1.4568
## At iterate    15  f =       6894.2  |proj g|=        1.7524
## At iterate    16  f =       6894.1  |proj g|=        1.7529
## At iterate    17  f =       6893.5  |proj g|=        1.3385
## At iterate    18  f =       6893.5  |proj g|=        1.8344
## At iterate    19  f =       6893.5  |proj g|=         1.326
## At iterate    20  f =       6893.3  |proj g|=        1.2883
## At iterate    21  f =       6893.1  |proj g|=        1.2415
## At iterate    22  f =         6893  |proj g|=        1.1971
## At iterate    23  f =       6892.9  |proj g|=        1.1642
## At iterate    24  f =       6892.8  |proj g|=        0.7639
## At iterate    25  f =       6892.7  |proj g|=        1.3311
## At iterate    26  f =       6892.7  |proj g|=       0.42227
## At iterate    27  f =       6892.7  |proj g|=       0.36679
## At iterate    28  f =       6892.7  |proj g|=        1.8255
## At iterate    29  f =       6892.7  |proj g|=       0.74016
## At iterate    30  f =       6892.7  |proj g|=       0.57575
## At iterate    31  f =       6892.7  |proj g|=       0.50008
## At iterate    32  f =       6892.7  |proj g|=       0.44632
## At iterate    33  f =       6892.7  |proj g|=       0.19297
## At iterate    34  f =       6892.7  |proj g|=       0.31146
## At iterate    35  f =       6892.7  |proj g|=        0.5553
## At iterate    36  f =       6892.7  |proj g|=       0.16631
## At iterate    37  f =       6892.7  |proj g|=       0.13603
## At iterate    38  f =       6892.7  |proj g|=       0.23918
## At iterate    39  f =       6892.7  |proj g|=       0.24905
## At iterate    40  f =       6892.7  |proj g|=       0.17555
## At iterate    41  f =       6892.7  |proj g|=       0.24901
## 
## iterations 41
## function evaluations 48
## segments explored during Cauchy searches 68
## BFGS updates skipped 0
## active bounds at final generalized Cauchy point 25
## norm of the final projected gradient 0.24901
## final function value 6892.69
## 
## F = 6892.69
## final  value 6892.689100 
## converged
plot(em)

ts.glmnet.em <- twoStep.glmnet(X = X.level0, y = y)
plot(ts.glmnet.em$emulator)

#test <- twoStep.glmnet(X = X)

#for(i in 1:ncol(Y0)){
  
#    em = twoStep.glmnet(X = X0, y = Y0[,i])
#     global.emlist[[i]] = em
#pred = predict(em$emulator, newdata = X.unif, type = 'UK')
#y.unif[,i] = pred$mean
#}